##################################################################################################################
# Code for Results in Appendix Section A5.9: Contextualizing Results on the Differences Between Types of Lobbyists
##################################################################################################################
write.table(data.frame(Description=c("","","","Appendix Section A5.9: Contextualizing Results on the Differences Between Types of Lobbyists")),row.names=F,col.names=F,quote=F)

##################################################################################################################
# To run, please set the working directory to match the location of the folder containing all replication files.
##################################################################################################################
#setwd("Replication_PSRM_ST24")

#if not yet created, create "output" folder
if("output"%in%list.files()==F){dir.create("output")}

#Clear environment
rm(list=ls());gc();gc();gc();gc()

#If not yet installed, install packages
if(length(find.package("dplyr",quiet=T))==0){install.packages("dplyr")}
if(length(find.package("Metrics",quiet=T))==0){install.packages("Metrics")}

#Load packages
library(dplyr)
library(Metrics)

#Set POSIX locale
Sys.setlocale(locale="C")

#Load main data set
loball5ciA<-readRDS("Data/PSRM_main_dataset.rds")


loball5ciA_agg_change3 <-loball5ciA %>% 
  filter(type%in%c("inhouse","contract")) %>%
  filter(Rcomparable_total_amounts_cleaned_exp>0) %>%
  group_by(lobname_cleaned,year,type,principal_cleaned) %>%
  summarise(.groups="keep",sd_amount_noRnoexp=sd(comparable_total_amounts_cleaned_noexp,na.rm=T),
            sd_amount_noR=sd(comparable_total_amounts_cleaned_exp,na.rm=T),
            sd_hours=sd(hours,na.rm=T))
###Results
write.table(data.frame(Description=c("","Results:")),row.names=F,col.names=F,quote=F)
write.table(data.frame(Description=c("","How often, in a given year for a given client-lobbyist relationship (contract vs. in-house), the presumption of constant hours when there are constant expenditures would be incorrect.")),row.names=F,col.names=F,quote=F)
#how many times would we assume incorrectly based on no expenditure change that no hour change? 
#in-house 88% of time
write.table(data.frame(Description=c("","In-House")),row.names=F,col.names=F,quote=F)
print(length(which(loball5ciA_agg_change3$sd_amount_noR==0&loball5ciA_agg_change3$sd_hours>0&loball5ciA_agg_change3$type=="inhouse"))/length(which(loball5ciA_agg_change3$type=="inhouse"&loball5ciA_agg_change3$sd_amount_noR==0&loball5ciA_agg_change3$sd_hours>=0)))
#contract 97% of time
write.table(data.frame(Description=c("","Contract")),row.names=F,col.names=F,quote=F)
print(length(which(loball5ciA_agg_change3$sd_amount_noR==0&loball5ciA_agg_change3$sd_hours>0&loball5ciA_agg_change3$type=="contract"))/length(which(loball5ciA_agg_change3$type=="contract"&loball5ciA_agg_change3$sd_amount_noR==0&loball5ciA_agg_change3$sd_hours>=0)))

write.table(data.frame(Description=c("","Percentage of client-lobbyist-year observations (separately for in-house and contract lobbyists) that have constant expenditures but non-constant hours.")),row.names=F,col.names=F,quote=F)
# percentage of client- lobbyist-years with non-zero expenditures have constant expenditures across the two half years but non-constant hours 
# 1% of observations for in-house lobbyist
write.table(data.frame(Description=c("","In-House")),row.names=F,col.names=F,quote=F)
print(round(length(which(loball5ciA_agg_change3$sd_amount_noR==0&loball5ciA_agg_change3$sd_hours>0&loball5ciA_agg_change3$type=="inhouse"))/length(which(loball5ciA_agg_change3$type=="inhouse"&loball5ciA_agg_change3$sd_amount_noR>=0&loball5ciA_agg_change3$sd_hours>=0)),2))
# 14% of observations for contract lobbyist
write.table(data.frame(Description=c("","Contract")),row.names=F,col.names=F,quote=F)
print(round(length(which(loball5ciA_agg_change3$sd_amount_noR==0&loball5ciA_agg_change3$sd_hours>0&loball5ciA_agg_change3$type=="contract"))/length(which(loball5ciA_agg_change3$type=="contract"&loball5ciA_agg_change3$sd_amount_noR>=0&loball5ciA_agg_change3$sd_hours>=0)),2))


loball5ciA_agg_change_lg <-loball5ciA %>% 
  filter(type%in%c("contract")) %>%
  filter(Rcomparable_total_amounts_cleaned_exp>0) %>%
  group_by(lobbying_company_cleaned,principal_cleaned,halfyear,year,type) %>%
  summarise(.groups="keep",comparable_total_amounts_cleaned_noexp=sum(comparable_total_amounts_cleaned_noexp,na.rm=T),
            comparable_total_amounts_cleaned_exp=sum(comparable_total_amounts_cleaned_exp,na.rm=T),
            hours=sum(hours,na.rm=T)) %>%
  group_by(lobbying_company_cleaned,year,principal_cleaned,type) %>%
  summarise(.groups="keep",sd_amount_noRnoexp=sd(comparable_total_amounts_cleaned_noexp,na.rm=T),
            sd_amount_noR=sd(comparable_total_amounts_cleaned_exp,na.rm=T),sd_hours=sd(hours,na.rm=T))

#how many times would we assume incorrectly based on no expenditure change that no hour change? 
#27% of all the observations for lobbying firms with non-missing SDs

write.table(data.frame(Description=c("","The numbers for contract lobbyists are even higher when not attributing expenditures to individual lobbyists based on the proportion of hours worked (as in the main analysis) but instead analyzing values at the level of the lobbying firm.")),row.names=F,col.names=F,quote=F)
print(length(which(loball5ciA_agg_change_lg$sd_amount_noR==0&loball5ciA_agg_change_lg$sd_hours>0&loball5ciA_agg_change_lg$type=="contract"))/length(which(loball5ciA_agg_change_lg$type=="contract"&is.na(loball5ciA_agg_change_lg$sd_amount_noR)==F)))


loball5ciA_agg_change5<-loball5ciA %>% 
  group_by(lobname_cleaned,session,type,principal_cleaned) %>% 
  summarise(.groups="keep",meanRrate=sum(Rcomparable_total_amounts_cleaned_exp,na.rm=T)/sum(hours,na.rm=T),
            meanrate=sum(comparable_total_amounts_cleaned_exp,na.rm=T)/sum(hours,na.rm=T))

loball5ciA_agg_change6<-merge(loball5ciA,loball5ciA_agg_change5,by=intersect(colnames(loball5ciA_agg_change5),colnames(loball5ciA)),all.x=T)
loball5ciA_agg_change6$predicted_hoursR<-loball5ciA_agg_change6$Rcomparable_total_amounts_cleaned_exp/loball5ciA_agg_change6$meanRrate
#assuming constant rate of compensation for all
loball5ciA_agg_change6$predicted_hoursR_constA<-loball5ciA_agg_change6$Rcomparable_total_amounts_cleaned_exp/median(loball5ciA_agg_change6$meanRrate)
#assuming constant rate of compensation by type (contract / inhouse)
loball5ciA_agg_change6<-loball5ciA_agg_change6 %>% 
  group_by(type) %>%
  mutate(medianRrate_type=median(meanRrate,na.rm=T))

loball5ciA_agg_change6$predicted_hoursR_constT<-loball5ciA_agg_change6$Rcomparable_total_amounts_cleaned_exp/loball5ciA_agg_change6$medianRrate_type

loball5ciA_agg_change7 <-loball5ciA_agg_change6 %>% 
  filter(type%in%c("inhouse","contract")) %>%
  filter(Rcomparable_total_amounts_cleaned_exp>0) %>%
  group_by(lobname_cleaned,session,type,principal_cleaned) %>%
  summarise(.groups="keep",mean_hours=mean(hours,na.rm=T),
            rmse_hoursR=rmse(hours,predicted_hoursR),
            rmse_hoursRT=rmse(hours,predicted_hoursR_constT))


write.table(data.frame(Description=c("","Contextualizing the results by showing via a prediction exercise how even with a relatively high level of information, variation in lobbying expenditures can be misleading about the number of hours worked.")),row.names=F,col.names=F,quote=F)
write.table(data.frame(Description=c("","Contract")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="contract"]),1))

write.table(data.frame(Description=c("","In-House")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="inhouse"]),1))

write.table(data.frame(Description=c("","Contract")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$mean_hours[loball5ciA_agg_change7$type=="contract"]),1))

write.table(data.frame(Description=c("","In-House")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$mean_hours[loball5ciA_agg_change7$type=="inhouse"]),1))

#Appendix footnote result

write.table(data.frame(Description=c("","Overall (footnote results)")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$rmse_hoursR),1))

print(round(median(loball5ciA_agg_change7$mean_hours),1))

#Calculations 

######
write.table(data.frame(Description=c("","Results on how even with information about the average rate of compensation or expenditures, going by the variation in expenditures can lead observers to be off by...")),row.names=F,col.names=F,quote=F)
#27.00+5.7471=32.7471
#5.7471/32.7471=0.1754995
write.table(data.frame(Description=c("","Contract")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="contract"])/c(median(loball5ciA_agg_change7$mean_hours[loball5ciA_agg_change7$type=="contract"])+median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="contract"])),3))
#27.00-5.7471
#5.7471/21.2529=0.2704149
print(round(median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="contract"])/c(median(loball5ciA_agg_change7$mean_hours[loball5ciA_agg_change7$type=="contract"])-median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="contract"])),3))
##
#2.0851+ 50.56=52.6451
#2.0851/ 52.6451=0.03960673
write.table(data.frame(Description=c("","In-House")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="inhouse"])/c(median(loball5ciA_agg_change7$mean_hours[loball5ciA_agg_change7$type=="inhouse"])+median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="inhouse"])),4))
#50.5625-2.0851=48.47744
#2.0851/48.47744=0.04301176
print(round(median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="inhouse"])/c(median(loball5ciA_agg_change7$mean_hours[loball5ciA_agg_change7$type=="inhouse"])-median(loball5ciA_agg_change7$rmse_hoursR[loball5ciA_agg_change7$type=="inhouse"])),4))

write.table(data.frame(Description=c("","Median RMSE when assuming, as average rate of compensation, the median half-yearly rate of expenditure for contract and in-house lobbyists respectively.")),row.names=F,col.names=F,quote=F)
write.table(data.frame(Description=c("","Contract")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$rmse_hoursRT[loball5ciA_agg_change7$type=="contract"]),1))

write.table(data.frame(Description=c("","In-House")),row.names=F,col.names=F,quote=F)
print(round(median(loball5ciA_agg_change7$rmse_hoursRT[loball5ciA_agg_change7$type=="inhouse"]),1))


####
#compare lobbyists across type categories who get compensated similar amounts per biennium in same group
loball5ciA_agg_sd_compare <-loball5ciA %>% 
  filter(type%in%c("inhouse","contract")) %>%
  group_by(lobname_cleaned,principal_cleaned,type,session) %>%
  summarise(.groups="keep",sum_amount=sum(Rcomparable_total_amounts_cleaned_exp,na.rm=T),
            sum_hours=sum(hours,na.rm=T))

exp_q<-unlist(lapply(c(seq(0.00,1,0.05)),function(x) quantile(loball5ciA_agg_sd_compare$sum_amount,x,na.rm=T)))
c_means<-c();c_medians<-c();c_sds<-c();c_1q<-c();c_3q<-c()
i_means<-c();i_medians<-c();i_sds<-c();i_1q<-c();i_3q<-c()
for(i in 1:c(length(exp_q)-1)){
  c_means[i]<-mean(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  i_means[i]<-mean(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  c_medians[i]<-median(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  i_medians[i]<-median(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  c_sds[i]<-sd(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  i_sds[i]<-sd(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  c_1q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.25)
  i_1q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.25)
  c_3q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.75)
  i_3q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.75)
}

write.table(data.frame(Description=c("","Figure A6: Number and Relative Variability of Hours Worked Across Lobbyists, By Type of 
Lobbyist")),row.names=F,col.names=F,quote=F)

png("output/PSRM_figA6.png",width=8,height = 8,units="in",res=300)
  
  par(mfrow=c(2,2),mar=c(4,4,4,1))
  #Panel 1
  plot(main=c("(1) Number of Hours, Client-Lobbyist Dyads"),seq(1,20,1)-0.15,i_medians,col="gray",ylim=c(0,3500),xlim=c(1,20),xaxt="n",
       ylab=c("Hours Worked for Client in Biennium"),xlab="Biennium Expenditure Percentiles",xaxt="n")
  axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)
  legend("topleft",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
  for(i in 1:20){
    lines(c(i,i)-0.15,c(i_medians[i],i_1q[i]),col="gray",lty=3)
    lines(c(i,i)-0.15,c(i_medians[i],i_3q[i]),col="gray",lty=3)
  }
  for(i in 1:20){
    lines(c(i,i)+0.15,c(c_medians[i],c_1q[i]),col="black",lty=3)
    lines(c(i,i)+0.15,c(c_medians[i],c_3q[i]),col="black",lty=3)
  }
  points(seq(1,20,1)-0.15,i_1q,col="gray",ylim=c(0,2000))
  points(seq(1,20,1)-0.15,i_3q,col="gray",ylim=c(0,3000))
  points(seq(1,20,1)+0.15,c_medians,col="black")
  points(seq(1,20,1)+0.15,c_1q,col="black",ylim=c(0,2000))
  points(seq(1,20,1)+0.15,c_3q,col="black",ylim=c(0,3000))
  #Panel 2
  plot(main=c("(2) Coefficient of Variability (Hours),\nClient-Lobbyist Dyads"),seq(1,20,1),i_sds/i_means,ylab="Coefficient of Variability (Hours)",col="gray",ylim=c(0,3.5),xlab="Biennium Expenditure Percentiles",xaxt="n")
  points(seq(1,20,1),c_sds/c_means,col="black",ylim=c(0,1200))
  axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)
  legend("topright",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
  #########
  
  #compare lobbyists across type categories who get compensated similar amounts per biennium in same group
  loball5ciA_agg_sd_compare <-loball5ciA %>% 
    filter(type%in%c("inhouse","contract")) %>%
    #still need to filter by : principal-lobbyist: only one non-zero expenditure per biennium
    group_by(lobname_cleaned,type,session) %>%
    summarise(.groups="keep",sum_amount=sum(Rcomparable_total_amounts_cleaned_exp,na.rm=T),
              sum_hours=sum(hours,na.rm=T))
  
  exp_q<-unlist(lapply(c(seq(0.00,1,0.05)),function(x) quantile(loball5ciA_agg_sd_compare$sum_amount,x,na.rm=T)))
  c_means<-c();c_medians<-c();c_sds<-c();c_1q<-c();c_3q<-c()
  i_means<-c();i_medians<-c();i_sds<-c();i_1q<-c();i_3q<-c()
  for(i in 1:c(length(exp_q)-1)){
    c_means[i]<-mean(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
    i_means[i]<-mean(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
    c_medians[i]<-median(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
    i_medians[i]<-median(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
    c_sds[i]<-sd(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
    i_sds[i]<-sd(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
    c_1q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.25)
    i_1q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.25)
    c_3q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.75)
    i_3q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.75)
  }
  
  #Panel 3
  plot(main=c("(3) Number of Hours,\nLobbyist Level"),seq(1,20,1)-0.15,i_medians,col="gray",ylim=c(0,3500),ylab=c("Hours Worked in Biennium"),xlab="Biennium Expenditure Percentiles",xaxt="n")
  axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)
  legend("topleft",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
  for(i in 1:20){
    lines(c(i,i)-0.15,c(i_medians[i],i_1q[i]),col="gray",lty=3)
    lines(c(i,i)-0.15,c(i_medians[i],i_3q[i]),col="gray",lty=3)
  }
  for(i in 1:20){
    lines(c(i,i)+0.15,c(c_medians[i],c_1q[i]),col="black",lty=3)
    lines(c(i,i)+0.15,c(c_medians[i],c_3q[i]),col="black",lty=3)
  }
  points(seq(1,20,1)-0.15,i_1q,col="gray",ylim=c(0,2000))
  points(seq(1,20,1)-0.15,i_3q,col="gray",ylim=c(0,3000))
  points(seq(1,20,1)+0.15,c_medians,col="black")
  points(seq(1,20,1)+0.15,c_1q,col="black",ylim=c(0,2000))
  points(seq(1,20,1)+0.15,c_3q,col="black",ylim=c(0,3000))
  
  #Panel 4
  plot(main=c("(4) Coefficient of Variability (Hours),\n Lobbyist Level"),seq(1,20,1),i_sds/i_means,ylab="Coefficient of Variability (Hours)",col="gray",ylim=c(0,3.5),xlab="Biennium Expenditure Percentiles",xaxt="n")
  points(seq(1,20,1),c_sds/c_means,col="black",ylim=c(0,1200))
  legend("topright",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
  axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)

dev.off()

write.table(data.frame(Description=c("(N.B. The file for this figure is saved as 'PSRM_figA6.png' in the 'output' folder.)")),row.names=F,col.names=F,quote=F)

#Include code again so that Figure A6 is also shown in R if code file is run using source()
par(mfrow=c(2,2),mar=c(4,4,4,1))
#Panel 1
plot(main=c("(1) Number of Hours, Client-Lobbyist Dyads"),seq(1,20,1)-0.15,i_medians,col="gray",ylim=c(0,3500),xlim=c(1,20),xaxt="n",
     ylab=c("Hours Worked for Client in Biennium"),xlab="Biennium Expenditure Percentiles",xaxt="n")
axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)
legend("topleft",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
for(i in 1:20){
  lines(c(i,i)-0.15,c(i_medians[i],i_1q[i]),col="gray",lty=3)
  lines(c(i,i)-0.15,c(i_medians[i],i_3q[i]),col="gray",lty=3)
}
for(i in 1:20){
  lines(c(i,i)+0.15,c(c_medians[i],c_1q[i]),col="black",lty=3)
  lines(c(i,i)+0.15,c(c_medians[i],c_3q[i]),col="black",lty=3)
}
points(seq(1,20,1)-0.15,i_1q,col="gray",ylim=c(0,2000))
points(seq(1,20,1)-0.15,i_3q,col="gray",ylim=c(0,3000))
points(seq(1,20,1)+0.15,c_medians,col="black")
points(seq(1,20,1)+0.15,c_1q,col="black",ylim=c(0,2000))
points(seq(1,20,1)+0.15,c_3q,col="black",ylim=c(0,3000))
#Panel 2
plot(main=c("(2) Coefficient of Variability (Hours),\nClient-Lobbyist Dyads"),seq(1,20,1),i_sds/i_means,ylab="Coefficient of Variability (Hours)",col="gray",ylim=c(0,3.5),xlab="Biennium Expenditure Percentiles",xaxt="n")
points(seq(1,20,1),c_sds/c_means,col="black",ylim=c(0,1200))
axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)
legend("topright",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
#########

#compare lobbyists across type categories who get compensated similar amounts per biennium in same group
loball5ciA_agg_sd_compare <-loball5ciA %>% 
  filter(type%in%c("inhouse","contract")) %>%
  #still need to filter by : principal-lobbyist: only one non-zero expenditure per biennium
  group_by(lobname_cleaned,type,session) %>%
  summarise(.groups="keep",sum_amount=sum(Rcomparable_total_amounts_cleaned_exp,na.rm=T),
            sum_hours=sum(hours,na.rm=T))

exp_q<-unlist(lapply(c(seq(0.00,1,0.05)),function(x) quantile(loball5ciA_agg_sd_compare$sum_amount,x,na.rm=T)))
c_means<-c();c_medians<-c();c_sds<-c();c_1q<-c();c_3q<-c()
i_means<-c();i_medians<-c();i_sds<-c();i_1q<-c();i_3q<-c()
for(i in 1:c(length(exp_q)-1)){
  c_means[i]<-mean(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  i_means[i]<-mean(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  c_medians[i]<-median(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  i_medians[i]<-median(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  c_sds[i]<-sd(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  i_sds[i]<-sd(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T)
  c_1q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.25)
  i_1q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.25)
  c_3q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="contract"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.75)
  i_3q[i]<-quantile(loball5ciA_agg_sd_compare$sum_hours[which(loball5ciA_agg_sd_compare$type=="inhouse"&loball5ciA_agg_sd_compare$sum_amount>exp_q[i]&loball5ciA_agg_sd_compare$sum_amount<=exp_q[i+1])],na.rm=T,0.75)
}

#Panel 3
plot(main=c("(3) Number of Hours,\nLobbyist Level"),seq(1,20,1)-0.15,i_medians,col="gray",ylim=c(0,3500),ylab=c("Hours Worked in Biennium"),xlab="Biennium Expenditure Percentiles",xaxt="n")
axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)
legend("topleft",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
for(i in 1:20){
  lines(c(i,i)-0.15,c(i_medians[i],i_1q[i]),col="gray",lty=3)
  lines(c(i,i)-0.15,c(i_medians[i],i_3q[i]),col="gray",lty=3)
}
for(i in 1:20){
  lines(c(i,i)+0.15,c(c_medians[i],c_1q[i]),col="black",lty=3)
  lines(c(i,i)+0.15,c(c_medians[i],c_3q[i]),col="black",lty=3)
}
points(seq(1,20,1)-0.15,i_1q,col="gray",ylim=c(0,2000))
points(seq(1,20,1)-0.15,i_3q,col="gray",ylim=c(0,3000))
points(seq(1,20,1)+0.15,c_medians,col="black")
points(seq(1,20,1)+0.15,c_1q,col="black",ylim=c(0,2000))
points(seq(1,20,1)+0.15,c_3q,col="black",ylim=c(0,3000))

#Panel 4
plot(main=c("(4) Coefficient of Variability (Hours),\n Lobbyist Level"),seq(1,20,1),i_sds/i_means,ylab="Coefficient of Variability (Hours)",col="gray",ylim=c(0,3.5),xlab="Biennium Expenditure Percentiles",xaxt="n")
points(seq(1,20,1),c_sds/c_means,col="black",ylim=c(0,1200))
legend("topright",col=c("black","gray"),legend=c("Contract","In-House"),pch=1)
axis(1,at=seq(2,21,2),labels = c("5-\n10","15-\n20","25-\n30","35-\n40","45-\n50","55-\n60","65-\n70","75-\n80","85-\n90","95-\n100"),cex=0.55,cex.axis=0.55,cex.lab=0.55)

